import scrapy
from qiubaiPro.items import QiubaiproItem

class QiubaiSpider(scrapy.Spider):
    name = 'qiubai'
    # allowed_domains = ['www.***.com']
    start_urls = ['https://www.qiushibaike.com/text/']

    def parse(self, response):
        # 解析作者名称+内容
        div_list = response.xpath('//div[@class="col1 old-style-col1"]/div')
        # print(div_list)
        # all_data = []
        for div in div_list:
            # xpath返回的是列表，但列表元素一定是Selector类型的对象
            # extract可以将Selector对象中data参数存储的字符串提取出来
            author = div.xpath('./div[1]/a[2]/h2/text()')[0].extract()
            content = div.xpath('./a[1]/div/span[1]//text()').extract()
            content = ''.join(content)
            # print(author,content)
            # with open(r'E:\Python\test\入门训练营\scrapy框架\qiubaiPro\qiuibai.txt','a+',encoding='utf-8') as fp:
            #     fp.write(author)
            #     fp.write(content)
        #     dic = {
        #         'author':author,
        #         'comtent':content
        #     }
        #     all_data.append(dic)
        # return all_data

            # 实例化item
            item = QiubaiproItem()
            item['author'] = author
            item['content'] = content
            # 将item提交给管道
            yield item


















